R Markdown

This is an result for the gene singatures as well as assigned cell type for each cluster based on Danaher et al.

knitr::opts_chunk$set(cache=TRUE)
for( i in 1:length(mean_score[1,])){
print(visualize_me(mean_score[,i],cell_list[i],analysis_results$tsne[c("TSNE.1","TSNE.2")],title=names(cell_list)[i]))
      
# really hard to see
#print(plot(mean_score[,i],type = "h",xlab="Single Cell",ylab=colnames(mean_score)[i]))
h <-hist(mean_score[,i],plot = FALSE)

print(plot(h, freq = TRUE, labels =TRUE, ylim=c(0, 1.2*max(h$counts)),main=c(names(cell_list)[i]," Score per Single Cell"),xlab=paste(sep=" ",names(cell_list)[i],"Expression Level"),ylab="Numbers of cells"))
}

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL
sig_plot <- visualize_clusters(cluster_tsne$cell_type,cluster_tsne[c("TSNE.1","TSNE.2")],title="Danaher cell-type labels",legend_anno= sort(unique(cluster_tsne[,"name_type"])))
#sig_plot <-sig_plot +scale_fill_discrete(name="Cell Type",
#                         breaks=unique(cluster_assignment$cell_type),
#                         labels=sapply(unique(cluster_assignment$cell_type),function(x) names(cell_list[x])))

print(sig_plot)

counter <-1
colnames(score_by_cluster) <- names(cell_list)
apply(score_by_cluster, 1,function(x) {plot(x,ylab="mean expression level",xlab="cell type", main=c("Cluster",counter))
text(x, names(cell_list), cex=0.6, pos=4, col="red")
counter <<- counter + 1})

##  [1]  2  3  4  5  6  7  8  9 10 11

This is a gene exprssion profile for each cell signature for each custer.The top expressed gene signatures of the majority of the cluster are well reprsented (All signatures are expressed among 80% to 90% of the cells in each cluster, except in Cluster 4,5 and 9)

top_sig <- data.frame(t(sapply(unique(all_type_expr_table$Cluster),function(x){ all_type_expr_table[all_type_expr_table$Cluster == x,][which(all_type_expr_table$precent_count[all_type_expr_table$Cluster == x] == max(all_type_expr_table$precent_count[all_type_expr_table$Cluster == x])),] })))
top_sig$Signature <-lapply(top_sig$Signature,function(y){paste(y)})
cbind(top_sig, cluster_assignment = cluster_type[,"name_type"])
##    Cluster               Signature Barcode_count total_Exp avg_non_zero
## 1        1             Macrophages           832      5571     5.435122
## 2        2                 T_cells         11147    103645     4.072815
## 3        3                 T_cells         16150    139048     3.920711
## 4        4             Neutrophils           546      1239     1.998387
## 5        5             Macrophages            34       177     5.205882
## 6        6                 B_cells          2187     21816     4.088456
## 7        7         Cytotoxic_cells          5882    283649     7.918954
## 8        8         Cytotoxic_cells           183      1727     7.256303
## 9        9             Macrophages            10        16          1.6
## 10  10, 10 Macrophages, Mast_cells          1, 1      1, 9         1, 3
##              SD cell_total_count precent_count cluster_assignment
## 1      3.699126              963      86.39668               CD45
## 2      3.460277            11716      95.14339            T_cells
## 3      3.382486            17068      94.62151            T_cells
## 4      2.303681             1768      30.88235               CD45
## 5       3.20775               75      45.33333        Macrophages
## 6      3.623682             2282      95.83699            B_cells
## 7      5.924657             5900      99.69492    Cytotoxic_cells
## 8      4.297662              202      90.59406    Cytotoxic_cells
## 9      1.897367               25            40        Macrophages
## 10 NA, 3.464102             1, 1      100, 100         Mast_cells
top_sig
##    Cluster               Signature Barcode_count total_Exp avg_non_zero
## 1        1             Macrophages           832      5571     5.435122
## 2        2                 T_cells         11147    103645     4.072815
## 3        3                 T_cells         16150    139048     3.920711
## 4        4             Neutrophils           546      1239     1.998387
## 5        5             Macrophages            34       177     5.205882
## 6        6                 B_cells          2187     21816     4.088456
## 7        7         Cytotoxic_cells          5882    283649     7.918954
## 8        8         Cytotoxic_cells           183      1727     7.256303
## 9        9             Macrophages            10        16          1.6
## 10  10, 10 Macrophages, Mast_cells          1, 1      1, 9         1, 3
##              SD cell_total_count precent_count
## 1      3.699126              963      86.39668
## 2      3.460277            11716      95.14339
## 3      3.382486            17068      94.62151
## 4      2.303681             1768      30.88235
## 5       3.20775               75      45.33333
## 6      3.623682             2282      95.83699
## 7      5.924657             5900      99.69492
## 8      4.297662              202      90.59406
## 9      1.897367               25            40
## 10 NA, 3.464102             1, 1      100, 100
#all_type_expr_table

A closer look at gene composition in Cluster 7 (used as an example). PCA is performed for Cytotoxic_cells to determine which gene(s) “drive” the signature. Excluding cell with only zeor counts. Furhter Normalization is required - Need to work on that.

knitr::opts_chunk$set(cache=TRUE)
all_list_gene<- get_signature_matrix (all_type_expr,7,"Cytotoxic_cells")
colnames(all_list_gene) <- gen[match(unlist(colnames(all_list_gene)),gen$id),]$symbol
alg_pca<-prcomp(all_list_gene)
summary(alg_pca)
## Importance of components%s:
##                           PC1    PC2    PC3    PC4     PC5    PC6     PC7
## Standard deviation     8.7936 5.8285 5.2541 4.9899 4.48346 3.9861 3.67443
## Proportion of Variance 0.3421 0.1503 0.1221 0.1102 0.08894 0.0703 0.05974
## Cumulative Proportion  0.3421 0.4925 0.6146 0.7248 0.81371 0.8840 0.94376
##                            PC8     PC9 PC10
## Standard deviation     3.45742 0.87048    0
## Proportion of Variance 0.05289 0.00335    0
## Cumulative Proportion  0.99665 1.00000    1
plot(alg_pca, type = "l")

alg_pca
## Standard deviations (1, .., p=10):
##  [1] 8.7935689 5.8285039 5.2541072 4.9899298 4.4834628 3.9860782 3.6744318
##  [8] 3.4574179 0.8704766 0.0000000
## 
## Rotation (n x k) = (10 x 10):
##                PC1          PC2          PC3          PC4           PC5
## CTSW  -0.050876357  0.064450843 -0.266535196  0.935181890  0.1345975494
## GNLY   0.962513591  0.187070782  0.146121342  0.063901415  0.0811692159
## GZMA   0.048840454 -0.044780733 -0.133907730  0.152381261 -0.0400913105
## GZMB   0.088441188 -0.125541620 -0.132864708 -0.026512722 -0.0152375302
## GZMH   0.050641375 -0.044374122  0.163583203  0.182051154 -0.9657662849
## PRF1   0.101475027  0.341239991 -0.856091055 -0.245492591 -0.2006149910
## NKG7   0.211639919 -0.904619540 -0.306574935 -0.030423399  0.0002883222
## KLRD1  0.055595212 -0.079572725  0.135507816  0.055429245  0.0215394720
## KLRB1  0.005285606  0.001572834  0.004616799  0.004609626  0.0063748678
## KLRK1  0.000000000  0.000000000  0.000000000  0.000000000  0.0000000000
##                PC6          PC7         PC8           PC9 PC10
## CTSW   0.013434041 -0.148839962 -0.08478107 -0.0036562987    0
## GNLY   0.058702898 -0.046504283  0.03015375 -0.0055591476    0
## GZMA  -0.058931103  0.818109881  0.52922009  0.0003209876    0
## GZMB  -0.936348129 -0.216648707  0.18472484 -0.0046448304    0
## GZMH   0.007425380 -0.051226751 -0.01260386  0.0046487401    0
## PRF1   0.049925164  0.034944441 -0.19007970  0.0030463462    0
## NKG7   0.188946442 -0.059107424 -0.05245696  0.0034637069    0
## KLRD1 -0.278940546  0.502046872 -0.79905868 -0.0180269393    0
## KLRB1 -0.009826444  0.007316746 -0.01304229  0.9997830720    0
## KLRK1  0.000000000  0.000000000  0.00000000  0.0000000000    1
loadings <- alg_pca$rotation
sdev <- alg_pca$sdev
var_coord <- var.cor <- t(apply(loadings, 1, var_cor_func, sdev))

test_1 <- all_type_expr[ all_type_expr$Cluster==7 & all_type_expr$Signature =="Cytotoxic_cells",] %>% group_by(Gene,Signature) %>% summarise(count=n_distinct(Barcode),EXP_mean=mean(as.numeric(Expression)),SD_mean = sd(as.numeric(Expression)),Abs_Exp=sum(as.numeric(Expression)))
test_1
## # A tibble: 10 x 6
## # Groups:   Gene [?]
##               Gene       Signature count  EXP_mean  SD_mean Abs_Exp
##             <fctr>          <fctr> <int>     <dbl>    <dbl>   <dbl>
##  1 ENSG00000100450 Cytotoxic_cells  4090  6.596822 3.684426   26981
##  2 ENSG00000100453 Cytotoxic_cells  4872  7.262931 3.795214   35385
##  3 ENSG00000105374 Cytotoxic_cells  5845  9.955004 5.375419   58187
##  4 ENSG00000111796 Cytotoxic_cells   411  2.289538 2.588221     941
##  5 ENSG00000115523 Cytotoxic_cells  5719 14.044938 8.436785   80323
##  6 ENSG00000134539 Cytotoxic_cells  1395  2.318280 2.553963    3234
##  7 ENSG00000145649 Cytotoxic_cells  4982  6.480329 3.599158   32285
##  8 ENSG00000172543 Cytotoxic_cells  4851  5.998557 3.573586   29099
##  9 ENSG00000180644 Cytotoxic_cells  3647  4.718124 3.577074   17207
## 10 ENSG00000213809 Cytotoxic_cells     7  1.000000 0.000000       7